Added a PDF info agent

Chris Eidhof 10 years ago
parent
commit
0465835da8
3 changed files with 69 additions and 0 deletions
  1. 1 0
      Gemfile
  2. 6 0
      Gemfile.lock
  3. 62 0
      app/models/agents/pdf_info_agent.rb

+ 1 - 0
Gemfile

@@ -81,6 +81,7 @@ gem 'string-scrub'	# for ruby <2.1
81 81
 gem 'therubyracer', '~> 0.12.1'
82 82
 gem 'typhoeus', '~> 0.6.3'
83 83
 gem 'uglifier', '>= 1.3.0'
84
+gem 'hypdf', '~> 1.0.7'
84 85
 
85 86
 group :development do
86 87
   gem 'better_errors', '~> 1.1'

+ 6 - 0
Gemfile.lock

@@ -174,12 +174,17 @@ GEM
174 174
     hipchat (1.2.0)
175 175
       httparty
176 176
     hpricot (0.8.6)
177
+    httmultiparty (0.3.10)
178
+      httparty (>= 0.7.3)
179
+      multipart-post
177 180
     http (0.5.1)
178 181
       http_parser.rb
179 182
     http_parser.rb (0.6.0)
180 183
     httparty (0.13.1)
181 184
       json (~> 1.8)
182 185
       multi_xml (>= 0.5.2)
186
+    hypdf (1.0.7)
187
+      httmultiparty (= 0.3.10)
183 188
     i18n (0.6.11)
184 189
     jquery-rails (3.1.1)
185 190
       railties (>= 3.0, < 5.0)
@@ -475,6 +480,7 @@ DEPENDENCIES
475 480
   guard-rspec
476 481
   hipchat (~> 1.2.0)
477 482
   httparty (~> 0.13)
483
+  hypdf (~> 1.0.7)
478 484
   jquery-rails (~> 3.1.0)
479 485
   json (~> 1.8.1)
480 486
   jsonpath (~> 0.5.6)

+ 62 - 0
app/models/agents/pdf_info_agent.rb

@@ -0,0 +1,62 @@
1
+require 'open-uri'
2
+require 'hypdf'
3
+
4
+module Agents
5
+  class PdfInfoAgent < Agent
6
+
7
+    gem_dependency_check { defined?(HyPDF) }
8
+
9
+    cannot_be_scheduled!
10
+
11
+    description <<-MD
12
+      In order for this agent to work, you need to have [HyPDF](https://devcenter.heroku.com/articles/hypdf) running and configured.
13
+
14
+      It works by acting on events that contain a key `url` in their payload, and runs the [pdfinfo](https://devcenter.heroku.com/articles/hypdf#pdfinfo) command on them.
15
+    MD
16
+
17
+    event_description <<-MD
18
+    This will change based on the metadata in the pdf.
19
+
20
+      { "Title"=>"Everyday Rails Testing with RSpec", 
21
+        "Author"=>"Aaron Sumner",
22
+        "Creator"=>"LaTeX with hyperref package",
23
+        "Producer"=>"xdvipdfmx (0.7.8)",
24
+        "CreationDate"=>"Fri Aug  2 05",
25
+        "32"=>"50 2013",
26
+        "Tagged"=>"no",
27
+        "Pages"=>"150",
28
+        "Encrypted"=>"no",
29
+        "Page size"=>"612 x 792 pts (letter)",
30
+        "Optimized"=>"no",
31
+        "PDF version"=>"1.5",
32
+        "url": "your url"
33
+      }
34
+    MD
35
+
36
+    def working?
37
+      !recent_error_logs?
38
+    end
39
+
40
+    def default_options
41
+      {}
42
+    end
43
+
44
+    def receive(incoming_events)
45
+      incoming_events.each do |event|
46
+        interpolate_with(event) do
47
+          url_to_scrape = event.payload['url']
48
+          check_url(url_to_scrape, event.payload) if url_to_scrape =~ /^https?:\/\//i
49
+        end
50
+      end
51
+    end
52
+
53
+    def check_url(in_url, payload)
54
+      return unless in_url.present?
55
+      Array(in_url).each do |url|
56
+        log "Fetching #{url}"
57
+        info = HyPDF.pdfinfo(open(url))
58
+        create_event :payload => info.merge(payload)
59
+      end
60
+    end
61
+  end
62
+end